## lynching-ss-replication.R - code to generate results for lynching paper
## 2023-07-27 - created file that replicates Security Studies R&R submission
library(sp)
library(spdep)
library(rgdal)
library(stargazer)
library(ggplot2)
library(stargazer)
library(mgcv)
library(tidyverse)
library(stringi)
library(dplyr)
library(MatchIt)
library(spatialreg)
library(rgenoud)
library(WeightIt)
library(osqp)
library(cobalt)
library(lwgeom)
library(sf)
library(maps)

# Set working directory (change path to location of unzipped folder)
setwd()

# Load data
load("main_data.Rdata")

###create list of Southern States
st.list <- c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")

###create base formula
#full sample
base.form <- formula(. ~  I(log1p(lynch_rate)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb)

#matched sample
base.form.match <- formula(. ~  I(log1p(lynch_dummy)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb)

#matched sample for low slaveholding counties
base.form.match.2 <- formula(. ~  I(log1p(lynch_dummy)) + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb)

#weighted sample for low slaveholding counties
base.form.weight <- formula(. ~  I(log1p(tolnay_seguin_lynch_rate)) + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb)

###Models for Table 2: Main results
##Model 1-bivarate
lynch.model <- lm(log(kfr_black_pooled_p25) ~ I(log1p(lynch_rate)) + state.abb,
                  data = main_data,
                  weights = sample.size,
                  subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model)

##Model 2-Main model
lynch.model.main <- lm(update(base.form, log(kfr_black_pooled_p25)~ .),
                       data = main_data,
                       weights = sample.size,
                       subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.main)

##model 3- Moran Eigenvectors
#transform data to poly data

#load county poly data
counties <- st_as_sf(maps::map("county", plot = FALSE, fill = TRUE))

#unite state and county so you an merge
unit_moran <- main_data %>%
  unite(ID, state_names, county_names, sep =",")

#merge 
lynch_moran_poly <- sp::merge(counties, unit_moran, by = "ID", all.x=T,all.y=F)

##create the geocoded data
sf::sf_use_s2(FALSE)
X<-lynch_moran_poly
W_nb <- poly2nb(X, queen = T)
#install.packages('spDataLarge', repos='https://nowosad.github.io/drat/', type='source')

##model 3- moran 
lynch.SFE <- SpatialFiltering(update(base.form, log(kfr_black_pooled_p25) ~ .),
                              data = X,
                              nb = W_nb,
                              style = "W",
                              ExactEV = TRUE,
                              zero.policy = TRUE)

lynch.model.SFE <- lm(update(base.form, log(kfr_black_pooled_p25) ~ . + fitted(lynch.SFE)),
                      data = X,
                      weights = sample.size,
                      subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA", "WV")));summary(lynch.model.SFE )

##Produce table 2
stargazer(lynch.model, 
          lynch.model.main, 
          title="Impact of lynching on Intergenerational Mobility", 
          dep.var.labels="Intergenerational Mobility", 
          covariate.labels=c("Lynch rate", "Prop. slave, 1860","County Area, 2000","Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)


######Models for Table 3 - Impact of lynching on Males and Females
##model 1-male
lynch.model.male <- lm(update(base.form, log(kfr_black_male_p25)~ .),
                       data = main_data,
                       weights = sample.size,
                       subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.male)

##model 2- female
lynch.model.female <- lm(update(base.form, log(kfr_black_female_p25)~ .),
                         data = main_data, 
                         weights = sample.size, 
                         subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.female)

##Produce table 3
stargazer(lynch.model.male, 
          lynch.model.female,  
          title="Effect of lynching on Males and Females", 
          dep.var.labels="Intergenerational Mobility", 
          covariate.labels=c("Lynch rate", "Prop. slave, 1860","County Area, 2000","Latitude, 2000","Latitude squared, 2000","Longitude, 2000", "Longitude squared, 2000","Ruggedness", "Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", "Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)

###Models for Table 4 - southern matched and weighted sample
##Create matched sample
subsample_df <- main_data %>% dplyr::select(kfr_black_pooled_p25, lynch_count,  pslave1860, coarea00, latitude, longitude, rugged, land.ineq1860, sfarmprop1860, totpop1860, fvalpac1860, acimp1860, fbprop1860, rail1860, water1860, south, state.abb, lynch_rate)

#create lynch dummy
subsample_df <- subsample_df %>% mutate(lynch_dummy =
                                          case_when(lynch_count >= 1 ~ 1, 
                                                    lynch_count < 1 ~0)
)

#remove NAs
subsample_df <- na.omit(subsample_df)

#subset data to only southern states
STATE <-c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")
subsample_df <- subsample_df %>% filter(state.abb %in% STATE)

##produce matched sample using genetic matching
m.out <- matchit(lynch_dummy ~ pslave1860 + coarea00 + latitude + longitude + rugged  + land.ineq1860 + sfarmprop1860 + totpop1860 + fvalpac1860 + acimp1860 + fbprop1860  + rail1860 + water1860 + south, data = subsample_df,
                 method = "genetic",
                 mahvars = ~ pslave1860 + totpop1860 + fvalpac1860 +  coarea00,
                 caliper = c(.03, pslave1860 = 0.4932, totpop1860 = 12941, fbprop1860 = 0.0110685),
                 std.caliper = c(TRUE, FALSE, FALSE, FALSE),
                 pop.size = 1000)
#check balance
summary(m.out)

#produce figure B1.a: mean plot
love.plot(m.out, 
          drop.distance = TRUE, 
          var.order = "unadjusted",
          abs = TRUE,
          line = TRUE, 
          thresholds = c(m = .1),
          var.names = v,
          colors = c("red", "blue"),
          shapes = c("triangle filled", "circle filled"),
          sample.names = c("Unmatched", "Matched"),
          limits = c(0, .82),
          position = c(.75, .25)) +
  theme(legend.box.background = element_rect(), 
        legend.box.margin = margin(1, 1, 1, 1))

#create matched df
m_data_genetic<- match.data(m.out)

##model for matched sample
lynch.model.genetic.match <- lm(update(base.form.match, log(kfr_black_pooled_p25)~ .), 
                                data = m_data_genetic, 
                                weights = sample.size, 
                                subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.genetic.match)


##Weighted sample
W <- weightit(lynch_rate ~  pslave1860 + coarea00 + latitude + longitude + rugged  + land.ineq1860 + sfarmprop1860 + totpop1860 + fvalpac1860 + acimp1860 + fbprop1860  + rail1860 + water1860 +south, 
              data = subsample_df, 
              method = "energy")

#Bring weights into the dataset
subsample_df$weights <- W$weights

#produce figure B1.b: mean plot
love.plot(W, thresholds = c(m = .1), 
          var.order = "unadjusted", var.names = v, abs = TRUE,
          shapes = c("triangle filled", "circle"), 
          colors = c("red", "blue"), line = TRUE,
          grid = FALSE, sample.names = c("Unweighted", "Weighted"),
          stars = "raw", position = "top")

#fit model with weights
lynch.model.weighted <- lm(update(base.form, log(kfr_black_pooled_p25)~ .), 
                           data = subsample_df, 
                           weights = weights);summary(lynch.model.weighted)

##Produce table 4
stargazer(lynch.model.genetic.match, 
          lynch.model.weighted, 
          title="Matched and Weighted sample results, including all covariates", 
          covariate.labels=c("Lynch Dummy", "Lynch Rate", " County Area, 2000"," Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)

###Models for Table C.1- Alternative measures of lynchings
##Model 1- count
lynch.model.full.count <- lm(log(kfr_black_pooled_p25) ~ lynch_count + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                             data = main_data, 
                             weights = sample.size, 
                             subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.full.count)

##Model 2-per total 10000 residents
lynch.model.per.resident <- lm(log(kfr_black_pooled_p25) ~ I(log1p(lynch_rate_per_10000_residents)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                               data = main_data, 
                               weights = sample.size, 
                               subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.per.resident)

#Produce table C.1
stargazer(lynch.model.full.count, lynch.model.per.resident, dep.var.labels="Intergenerational Mobility", covariate.labels=c("Lynch count, 1865-1940","Lynchings per 10,000 1920 Residents, 1882-1930", " Prop. slave, 1860"," County Area, 2000"," Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)


###Models for Table D.1- Alternative measures of the South
##Model 1- Census south

##Model 2-Main model
lynch.model.census.south <- lm(update(base.form, log(kfr_black_pooled_p25)~ .),
                               data = main_data,
                               weights = sample.size,
                               subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "DE", "GA", "FL", "KY", "LA","OK", "MD", "MS", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.census.south)

##Model 2- Deep south
lynch.model.deep.south <- lm(update(base.form, log(kfr_black_pooled_p25)~ .),
                             data = main_data,
                             weights = sample.size,
                             subset = state.abb %in% st.list & (state.abb %in% c("AL", "GA", "LA", "MS", "SC")));summary(lynch.model.deep.south)



stargazer(lynch.model.census.south, lynch.model.deep.south, 
          title="Alternative measure of the South", 
          dep.var.labels="Intergenerational Mobility", 
          covariate.labels=c("Lynch rate", " Prop. slave, 1860"," County Area, 2000"," Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)


###Models for Table E.1- low slaveholding southern and northern counties
##Create matched sample
subsample_low_slaveholding_df <- main_data %>% dplyr::select(kfr_black_pooled_p25, tolnay_seguin_lynch_count,  pslave1860, coarea00, latitude, longitude, rugged, land.ineq1860, sfarmprop1860, totpop1860, fvalpac1860, acimp1860, fbprop1860, rail1860, water1860, south, state.abb, tolnay_seguin_lynch_rate)

#create lynch dummy
subsample_low_slaveholding_df <- subsample_low_slaveholding_df %>% mutate(lynch_dummy =
                                                                            case_when(tolnay_seguin_lynch_count >= 1 ~ 1, 
                                                                                      tolnay_seguin_lynch_count < 1 ~0)
)

#remove NAs
subsample_low_slaveholding_df <- na.omit(subsample_low_slaveholding_df)

#subset data to only include southern and northern bordering states
STATE <-c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV", "IA", "IL", "IN", "OH", "PA", "DE", "MD", "NJ", "NY")

subsample_low_slaveholding_df <- subsample_low_slaveholding_df %>% filter(state.abb %in% STATE)

#cut top ten  percent of slaveholding counties
slave.cut <- quantile(subsample_low_slaveholding_df$pslave, na.rm = TRUE, probs = .9)

subsample_low_slaveholding_df <-subsample_low_slaveholding_df %>% filter(pslave1860 <= slave.cut)

dim(subsample_low_slaveholding_df)

##produce matched sample using genetic matching
m.out2 <- matchit(lynch_dummy ~ coarea00 + latitude + longitude + rugged  + land.ineq1860 + sfarmprop1860 + totpop1860 + fvalpac1860 + acimp1860 + fbprop1860  + rail1860 + water1860, data = seguin.inter.gen.match,
                  method = "genetic",
                  mahvars = ~ totpop1860 + fvalpac1860 +  coarea00,
                  caliper = c(.03, totpop1860 = 19185.0, fbprop1860 = 0.0147407),
                  std.caliper = c(TRUE, FALSE, FALSE),
                  pop.size = 1000)
#check balance
summary(m.out2)

#produce figure E1.a: mean plot
love.plot(m.out2, 
          drop.distance = TRUE, 
          var.order = "unadjusted",
          abs = TRUE,
          line = TRUE, 
          thresholds = c(m = .1),
          var.names = v,
          colors = c("red", "blue"),
          shapes = c("triangle filled", "circle filled"),
          sample.names = c("Unmatched", "Matched"),
          limits = c(0, .82),
          position = c(.75, .25)) +
  theme(legend.box.background = element_rect(), 
        legend.box.margin = margin(1, 1, 1, 1))

#create matched df
m_data_genetic_2<- match.data(m.out2)

#model for dummy
##model for matched sample
lynch.model.genetic.match.2 <- lm(update(base.form.match.2, log(kfr_black_pooled_p25)~ .), 
                                  data = m_data_genetic, 
                                  weights = sample.size);summary(lynch.model.genetic.match.2 )


##Weighted sample

W.2 <- weightit(tolnay_seguin_lynch_rate ~ coarea00 + latitude + longitude + rugged  + land.ineq1860 + sfarmprop1860 + totpop1860 + fvalpac1860 + acimp1860 + fbprop1860  + rail1860 + water1860 +south,
                data = subsample_low_slaveholding_df,
                method = "energy")
W.2

#Bring weights into the dataset
subsample_low_slaveholding_df$weights <-  W.2$weights

#produce figure B1.b: mean plot
love.plot(W.2, thresholds = c(m = .1), 
          var.order = "unadjusted", var.names = v, abs = TRUE,
          shapes = c("triangle filled", "circle"), 
          colors = c("red", "blue"), line = TRUE,
          grid = FALSE, sample.names = c("Unweighted", "Weighted"),
          stars = "raw", position = "top")

#fit model with weights
lynch.model.weighted.2 <- lm(update(base.form.weight, log(kfr_black_pooled_p25)~ .), 
                             data = subsample_low_slaveholding_df, 
                             weights = weights);summary(lynch.model.weighted.2)

##Produce table B.1
stargazer(lynch.model.genetic.match.2, 
          lynch.model.weighted.2, 
          title="Association is robust to restricting sample to low-slaveholding counties",
          covariate.labels=c("Lynch Dummy", "Lynch Rate", " County Area, 2000"," Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE)

###Models for Table F.1- different time periods
###jim crow
lynch.model.recontruction <- lm(log(kfr_black_pooled_p25) ~ I(log1p(reconstruction_lynch_rate)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                                data = main_data, 
                                weights = sample.size, 
                                subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.recontruction)

###1877 to 1910
lynch.model.1877.1910 <- lm(log(kfr_black_pooled_p25) ~ I(log1p(before_migration_lynch_rate)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                            data = main_data, 
                            weights = sample.size, 
                            subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.1877.1910)

###1910 to 1940
lynch.model.1910.1940 <- lm(log(kfr_black_pooled_p25) ~ I(log1p(during_migration_lynch_rate)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                            data = main_data, 
                            weights = sample.size, 
                            subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.1910.1940)

#Produce table F.1
stargazer(lynch.model.recontruction, 
          lynch.model.1877.1910, 
          lynch.model.1910.1940,  
          title="Effect of lynching across different time periods", 
          dep.var.labels="Intergenerational Mobility", 
          covariate.labels=c("Lynch rate (reconstruction)","Lynch rate (before migration)","Lynch rate (during migration)", "Prop. slave, 1860","County Area, 2000","Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), 
          no.space=TRUE
)

###Models for Table G.1- rural counties
##remove top 10 percent
den.cut <- quantile(main_data $pop_den_1860, na.rm = TRUE, probs = .9)

pop_den_remove_df <-main_data %>% filter(pop_den_1860<= den.cut)

##model 1
lynch.model.rural <- lm(log(kfr_black_pooled_p25) ~ I(log1p(lynch_rate)) + pslave1860 + log(coarea00) + latitude + I(latitude^2) + longitude + I(longitude^2)+ rugged  + land.ineq1860 + sfarmprop1860 + log(totpop1860) + log(fvalpac1860) + log(acimp1860) + fbprop1860  + rail1860 + water1860 + state.abb, 
                        data = pop_den_remove_df, 
                        weights = sample.size, 
                        subset = state.abb %in% st.list & (state.abb %in% c("AL", "AR", "GA", "FL", "KY", "LA", "MS", "MO", "NC", "SC", "TN", "TX", "VA","WV")));summary(lynch.model.rural)

#Produce table G.1
stargazer(lynch.model.rural,  
          title="Lynching effect remains after restricting sample to rural counties", 
          dep.var.labels="Intergenerational Mobility", 
          covariate.labels=c("Percentage lynch, 1865-1940", " Prop. slave, 1860"," County Area, 2000"," Latitude, 2000","Latitude squared, 2000","Longitude, 2000", " Longitude squared, 2000","Ruggedness", " Gini coefficient, 1860 ", "Prop. small farms, 1860", "Total population, 1860 ", " Farm value per capita, 1860", "Total improved acreage, 1860 ", "Prop. free black, 1860", "Rail access, 1860", "Water access, 1860"), 
          omit.stat=c("LL","f"), no.space=TRUE)

